Lineland: January 2009

Wednesday, January 28, 2009

Changing HBase Tables in Code

One thing I did early on is storing the HBase table descriptions in an external XML file, like a database schema. For example:

<table>
  <name>documents</name>
  <table_name>docs</table_name>
  <description>Stores the actual documents.</description>
  <column_family>
    <name>contents</name>
    <description>Holds the actual raw data.</description>
    <!-- Default: 3 -->
    <max_versions></max_versions>
    <!-- Default: DEFAULT_COMPRESSION_TYPE -->
    <compression_type></compression_type>
    <!-- Default: false -->
    <in_memory></in_memory>
    <!-- Default: false -->
    <block_cache_enabled/>
    <!-- Default: -1 (forever) -->
    <time_to_live/>
    <!-- Default: 2147483647 -->
    <max_value_length></max_value_length>
    <!-- Default: DEFAULT_BLOOM_FILTER_DESCRIPTOR -->
    <bloom_filter></bloom_filter>
  </column_family>
  <column_family>
    <name>mimetype</name>
    <description>Holds the MIME type of the data.</description>
    <!-- Default: 3 -->
    <max_versions></max_versions>
    <!-- Default: DEFAULT_COMPRESSION_TYPE -->
    <compression_type></compression_type>
    <!-- Default: false -->
    <in_memory></in_memory>
    <!-- Default: false -->
    <block_cache_enabled/>
    <!-- Default: -1 (forever) -->
    <time_to_live/>
    <!-- Default: 2147483647 -->
    <max_value_length></max_value_length>
    <!-- Default: DEFAULT_BLOOM_FILTER_DESCRIPTOR -->
    <bloom_filter></bloom_filter>
  </column_family>
</table>

While this adds extra work to maintain the schemas, it does give a central place where all the metadata about the HBase tables is stored.

In the code I added the functionality to read these XML files into internal classes that represent each table. For example:

/**
 * Describes a table HBase independent to be used by calling class.
 */
public class TableSchema {

  private String name = null;
  private String description = null;
  private String tableName = null;
  private HashMap<String, ColumnDefinition> columns = new HashMap<String, ColumnDefinition>();
  
  public String getName() {
    return name;
  }
  
  public void setName(String name) {
    this.name = name;
  }

  public String getDescription() {
    return description;
  }

  public void setDescription(String description) {
    this.description = description;
  }
  
  public String getTableName() {
    return tableName;
  }
  
  public void setTableName(String tableName) {
    this.tableName = tableName;
  }

  public void addColumn(ColumnDefinition column) {
    columns.put(column.getName(), column);
  }
  
  public Collection<ColumnDefinition> getColumns() {
    return columns.values();
  }
  
  public ColumnDefinition getColumnDefinition(String name) {
    return columns.get(name);
  }
  
  @Override
  public String toString() {
    return "name -> " + name + "\n  description -> " + description + 
      "\n  tableName -> " + tableName + "\n  columns -> " + columns;
  }

} // TableSchema

In addition I added a function to convert these instances into those that HBase understands. I also added a generic helper to get a table reference:

/**
 * Converts the XML based schema to a version HBase can take natively.
 * 
 * @param schema  The schema with the all tables.
 * @return The converted schema as a HBase object.
 */
private HTableDescriptor convertSchemaToDescriptor(TableSchema schema) {
  HTableDescriptor desc;
  desc = new HTableDescriptor(schema.getTableName());
  Collection<ColumnDefinition> cols = schema.getColumns();
  for (ColumnDefinition col : cols) {
    HColumnDescriptor cd = new HColumnDescriptor(Bytes.toBytes(col.getColumnName()), col.getMaxVersions(), 
      col.getCompressionType(), col.isInMemory(), col.isBlockCacheEnabled(), col.getMaxValueLength(), 
      col.getTimeToLive(), col.isBloomFilter());
    desc.addFamily(cd);
  }
  return desc;
} // convertSchemaToDescriptor

/**
 * Returns a table descriptor or <code>null</code> if it does not exist.
 * 
 * @param name  The name of the table.
 * @return The table descriptor or <code>null</code>.
 * @throws IOException When the communication to HBase fails.
 */
private HTableDescriptor getHBaseTable(String name) throws IOException {
  HTableDescriptor[] tables = _hbaseAdmin.listTables();
  for (int i = 0; i < tables.length; i++) 
    if (tables[i].getNameAsString().equals(name)) return tables[i];
  return null;
} // getHBaseTable

Now I can ask for a table and if it does not exist I can create it. But what if it does exist already? I am facing the problem of checking if a table schema is different from the table that is deployed. If it is the same, fine, simply load it, but if it is different you have to compare the column definitions and change those columns that have changed. Here is an approach:

/**
 * Returns a HBase table. The table is either opened, created or updated.
 * 
 * @param schema  The external schema describing the table.
 * @param create  True means create table if non existent.
 * @return The internal table container.
 * @throws IOException When the table creation fails.
 */
private TableContainer createTable(TableSchema schema, boolean create) 
                                                   throws IOException {
  TableContainer res = new TableContainer();
  res.setSchema(schema);
  HTableDescriptor desc = null;
  if (_hbaseAdmin.tableExists(schema.getTableName())) {
    desc = getHBaseTable(schema.getTableName());
    // only check for changes if we are allowed to
    if (create) {
      HTableDescriptor d = convertSchemaToDescriptor(schema);
      // compute differences
      List<HColumnDescriptor> modCols = new ArrayList<HColumnDescriptor>();
      for (HColumnDescriptor cd : desc.getFamilies()) {
        HColumnDescriptor cd2 = d.getFamily(cd.getName());
        if (cd2 != null && !cd.equals(cd2)) modCols.add(cd2);
      }        
      List<HColumnDescriptor> delCols = new ArrayList<HColumnDescriptor>(desc.getFamilies());
      delCols.removeAll(d.getFamilies());
      List<HColumnDescriptor> addCols = new ArrayList<HColumnDescriptor>(d.getFamilies());
      addCols.removeAll(desc.getFamilies());
      // check if we had a column that was changed, added or deleted
      if (modCols.size() > 0 || addCols.size() > 0 || delCols.size() > 0) {
        // yes, then disable table and iterate over changes
        _hbaseAdmin.disableTable(schema.getTableName());
        for (HColumnDescriptor col : modCols)
          _hbaseAdmin.modifyColumn(schema.getTableName(), col.getNameAsString(), col);
        for (HColumnDescriptor col : addCols)
          _hbaseAdmin.addColumn(schema.getTableName(), col);
        for (HColumnDescriptor col : delCols) 
          _hbaseAdmin.deleteColumn(schema.getTableName(), col.getNameAsString() + ":");
        // enable again and reload details
        _hbaseAdmin.enableTable(schema.getTableName());
        desc = getTable(schema.getTableName(), false);
      }
    }
  } else if (create) {
    desc = convertSchemaToDescriptor(schema);
    _hbaseAdmin.createTable(desc);
  }
  res.setDescription(desc);
  HTable table = null;
  if (desc != null) table = new HTable(_hbaseConfig, desc.getName());
  res.setTable(table);
  return res;
} // createTable

That's it I guess. Please note that this is my attempt of solving it, not sure yet if it works. I will test it as soon as I can and update here accordingly. But I thought I throw it out anyways, who knows maybe it helps someone or someone can help me. :)

Oh, for completeness sake, here the returned class I created to hold my table details:

/**
 * Container to hold a table's details.
 */
class TableContainer {
  
  private HTable table;
  private HTableDescriptor description;
  private TableSchema schema;
  
  public HTable getTable() {
    return table;
  }
  
  public void setTable(HTable table) {
    this.table = table;
  }
  
  public HTableDescriptor getDescription() {
    return description;
  }
  
  public void setDescription(HTableDescriptor description) {
    this.description = description;
  }

  public TableSchema getSchema() {
    return schema;
  }

  public void setSchema(TableSchema schema) {
    this.schema = schema;
  }
  
  @Override
  public String toString() {
    return "table -> " + table + ", description -> " + description + 
      ", schema -> " + schema;
  }
  
} // TableContainer

/**
 * Describes a column and its features.
 */
public class ColumnDefinition {

  /** The divider between the column family name and a label. */
  public static final String DIV_COLUMN_LABEL    = ":";
  /** Default values for HBase. */
  private static final int DEF_MAX_VERSIONS      = HColumnDescriptor.DEFAULT_VERSIONS;
  /** Default values for HBase. */
  private static final CompressionType DEF_COMPRESSION_TYPE = HColumnDescriptor.DEFAULT_COMPRESSION;
  /** Default values for HBase. */
  private static final boolean DEF_IN_MEMORY     = HColumnDescriptor.DEFAULT_IN_MEMORY;
  /** Default values for HBase. */
  private static final boolean DEF_BLOCKCACHE_ENABLED = HColumnDescriptor.DEFAULT_BLOCKCACHE;
  /** Default values for HBase. */
  private static final int DEF_MAX_VALUE_LENGTH  = HColumnDescriptor.DEFAULT_LENGTH;
  /** Default values for HBase. */
  private static final int DEF_TIME_TO_LIVE      = HColumnDescriptor.DEFAULT_TTL;
  /** Default values for HBase. */
  private static final boolean DEF_BLOOM_FILTER  = HColumnDescriptor.DEFAULT_BLOOMFILTER;
  
  private String name;
  private String tableName;
  private String description;
  private int maxVersions = DEF_MAX_VERSIONS;
  private CompressionType compressionType = DEF_COMPRESSION_TYPE;
  private boolean inMemory = DEF_IN_MEMORY;
  private boolean blockCacheEnabled = DEF_BLOCKCACHE_ENABLED;
  private int maxValueLength = DEF_MAX_VALUE_LENGTH;
  private int timeToLive = DEF_TIME_TO_LIVE;
  private boolean bloomFilter = DEF_BLOOM_FILTER;
  
  public String getColumnName() {
    return name.endsWith(":") ? name : name + ":";
  }
  
  public String getName() {
    return name;
  }
  
  public void setName(String name) {
    this.name = name;
  }
  
  public String getTableName() {
    return tableName;
  }

  public void setTableName(String tableName) {
    this.tableName = tableName;
  }

  public String getDescription() {
    return description;
  }

  public void setDescription(String description) {
    this.description = description;
  }
  
  public int getMaxVersions() {
    return maxVersions;
  }
  
  public void setMaxVersions(int maxVersions) {
    this.maxVersions = maxVersions;
  }

  public CompressionType getCompressionType() {
    return compressionType;
  }

  public void setCompressionType(CompressionType compressionType) {
    this.compressionType = compressionType;
  }
  
  public boolean isInMemory() {
    return inMemory;
  }

  public void setInMemory(boolean inMemory) {
    this.inMemory = inMemory;
  }

  /**
   * @return Returns the blockCacheEnabled.
   */
  public boolean isBlockCacheEnabled() {
    return blockCacheEnabled;
  }

  /**
   * @param blockCacheEnabled The blockCacheEnabled to set.
   */
  public void setBlockCacheEnabled(boolean blockCacheEnabled) {
    this.blockCacheEnabled = blockCacheEnabled;
  }

  /**
   * @return Returns the timeToLive.
   */
  public int getTimeToLive() {
    return timeToLive;
  }
  
  /**
   * @param timeToLive The timeToLive to set.
   */
  public void setTimeToLive(int timeToLive) {
    this.timeToLive = timeToLive;
  }

  /**
   * @return Returns the bloomFilter.
   */
  public boolean isBloomFilter() {
    return bloomFilter;
  }

  /**
   * @param bloomFilter The bloomFilter to set.
   */
  public void setBloomFilter(boolean bloomFilter) {
    this.bloomFilter = bloomFilter;
  }

  public int getMaxValueLength() {
    return maxValueLength;
  }

  public void setMaxValueLength(int maxValueLength) {
    this.maxValueLength = maxValueLength;
  }

  @Override
  public String toString() {
    return "name -> " + name + 
      "\n  tableName -> " + tableName + 
      "\n  description -> " + description + 
      "\n  maxVersions -> " + maxVersions +
      "\n  compressionType -> " + compressionType + 
      "\n  inMemory -> " + inMemory +
      "\n  blockCacheEnabled -> " + blockCacheEnabled + 
      "\n  maxValueLength -> " +  maxValueLength + 
      "\n  timeToLive -> " +  timeToLive + 
      "\n  bloomFilter -> " + bloomFilter;
  } // toString

} // ColumnDefinition

Not much to it obviously, but hey.

Update: I fixed the code to handle added and removed columns properly. The previous version would only handle changed columns.

Tuesday, January 27, 2009

How to use HBase with Hadoop

I have been using Hadoop and HBase for a while now. The "raw" directory dump on the right may give you a rough idea. ;)

With each new release I went through the iterations of the supplied helper classes to scan a HBase table from within a Map/Reduce job. What I did not find was a description of how to use these classes. That has improved thanks to the relentless work of the volunteers and Michael Stack who put it all together. Without him and the rest of the team HBase would be nowhere. Anyhow, here is my spin on this issue and where I am still not sure about how to handle things:

The Hadoop Tool class is the launcher application and its purposes is to read the command line parameters and then set up a JobConf instance that holds the job details such as what classes to use to read the input, what is the mapper and reducer class, what are the classes of the key and value for each of these and so on. The command line parameters usually specify how many Map and Reduce should be run on the cluster, what table to process, what columns and so on.

Once the Tool has set up the JobConf, it runs the job on the cluster. With HBase there are special Map/Reduce classes that serve as helpers or starting point to process HBase tables. They are in the HBase path under the "mapred" package. Their names are TableInputFormat, TableReduce, TableMap, IdentityTableMap and so on.

After the job has started, the first thing that happens is the preparation for the Map phase, which is done in the InputFormat class. It serves as the filter to read the raw data and pass it into the Map phase as key/value pairs. For the HBase job this is done in the supplied TableInputFormat class. What it does is splitting the table you are scanning into chunks that can be handed to the Map instances. By the way, you are allowed to only scan one single table at a time, but any columns you want to process out of that table.

In HBase a table is physically divided into many regions, which are in turn served by different RegionServers. The way splitting is done is it maps each split to exactly one region of the table you are scanning. Because of that you may end up with a couple of thousand splits for a single table. For example, I have about >6000 regions for one table.

It is recommended by the HBase team to match the number of Map instances to the number of splits (aka table regions). Instead of always having to check the number first using for example the HBase UI I have opted to automate the computation of the number of splits to use. I simply ask the table how many "start keys" it knows of. This should equal the number of regions as each region has a new starting key:

/**
 * Computes the number of regions per table.
 * 
 * @return The total number of regions per table.
 * @throws IOException When the table is not created.
 */
private int getNumberOfRegions(String tableName) throws IOException {
  // sanity check
  if (tableName == null) return -1;
  HTable table = new HTable(hbaseConfig, tableName);
  byte[][] startKeys = table.getStartKeys();
  return startKeys.length;
}

Each split is one region and one region holds a start and end key to be processed. As each split is read by the Map instance a HBase Scanner is created to scan the rows of the split's keys.

Each of these rows are handed to the TableMap class, or rather a class that implements this interface. You can use the supplied IndentityTableMap since often you are simply passing on the rows to the Map step. As per the Map/Reduce process the rows then get sorted during the next phase and eventually passed on to the TableReduce class.

So what you have now is the row and all columns that were listed usually as a parameter to Tool class implementation. For example if you named "contents:,mimetype:" then those two column families are handed to you - and in this special case with all labels! If you had specified "contents:en,mimetype:en" then you would have gotten only exactly those two columns with that particular label. So leaving out the label defaults to wildcard all labels (which is because the HBase Scanner class implements it that way).

In the Reduce phase you perform what work you have to and then pass on the result to the TableOutputFormat class. Here you write out what you need and you are done.

During the process you can call upon counters to count anything you like. That is what I do to count how many documents I have in total and how many for each respective target language etc. At the end of the job run I read the counters and store the values back into HBase or MemCacheDB as meta data.

Now the question you may have is, where do I do what work? In the Map or Reduce phase? And for the latter, why do I need a formatter?

I had the same questions. My understanding is that HBase is a special Hadoop Map/Reduce case. This is because keys in HBase are unique, so doing a map first and then sorting them so that they can be reduced is not necessary. I in fact have one job that I only use a Map phase for, doing it so:

job.setNumReduceTasks(0);
job.setInputFormat(MyTextInputFormat.class);
job.setMapperClass(MyMapper.class);
job.setOutputFormat(NullOutputFormat.class);

So it is a decision you have to make on your own, do I need a particular phase or not? In the above I am not scanning HBase tables but rather read a text file stored in the DFS of Hadoop and each line is an update instruction for an existing document stored in HBase. There is no need to do the sorting or reducing.

As far as HBase scans are concerned, you may want to keep the Input/Split/Map/Sort/Reduce/Output phases, that is also why there are those base classes the excellent HBase team supplied matching that concept. Usually the IdentityTableMap class is used to pass on the rows and columns and all the work is done in the Reduce phase.

Leaves one thing left, why having a TableReduce and a TableOutputFormat class? The reason is that in the Reduce you output what needs to be "saved" - but now how. You can therefore run two very similar jobs which only differ in how they save the data by replacing the output format class.

Again, I have cases where I do not output but save back to HBase. I could easily write the records back into HBase in the reduce step, so why pass them on first? I think this is in some cases just common sense or being a "good citizen". I still have code where I am torn as to where to process the final output. Sometimes I lean this way, sometimes the other.

Other notes:
1) With HBase 0.19.0 there is now a central helper class called TableMapReduceUtil which helps setting up jobs like so:

job.setMapperClass(MyMap.class);
TableMapReduceUtil.initTableReduceJob(TABLE_NAME, MyReduce.class, job);
...

It helps you setting up all the required details for Map and/or Reduce jobs based on the supplied helper classes.

2) Writing back to the same HBase table is OK when doing it in the Reduce phase as all scanning has concluded in the Map phase beforehand, so all rows and columns are saved to an intermediate Hadoop SequenceFile internally and when you process these and write back to HBase you have no problems that there is still a scanner for the same job open reading the table.

Otherwise it is OK to write to a different HBase table even during the Map phase.

Hope that helps a little.

Monday, January 19, 2009

VServer is not Xen, Part 2

Another oddity about VServer is that it does not have a true init process. Or rather, the whole start up is not as you are used to from other Linux systems.

While you can read about the different Init Styles there is one crucial issue: the startup scripts, usually located in /etc/rc.<n> are either executed outside of the VM or inside, so that you can either "see" the VM starting up from the master or not respectively. While this is OK and usable for most applications, it has a major problem. You cannot run DJB's daemontools.

This is because while the above startup styles execute the init scripts, it does not execute anything else from the inittab configuration files. Most importantly the last line in the following excerpt from /etc/inittab:

...
# Example how to put a getty on a serial line (for a terminal)
#
#T0:23:respawn:/sbin/getty -L ttyS0 9600 vt100
#T1:23:respawn:/sbin/getty -L ttyS1 9600 vt100

# Example how to put a getty on a modem line.
#
#T3:23:respawn:/sbin/mgetty -x0 -s 57600 ttyS3

SV:123456:respawn:/command/svscanboot

The last line is what starts the root daemontools process that starts all services it maintains. In VServer is simply will not start.

The issue for me started a lot earlier, I should have seen this coming really. When I tried the initial setup I went down the usual (at least for me) get the daemontools-installer Debian package and build the binaries. I did this in the VM obviously, because that is where I wanted to install the daemontools. Here is what happened:

$ build-daemontools       

This script unpacks the daemontools source into a directory, and
compiles it to produce a binary daemontools*.deb file.
...
Press ENTER to continue...
Attempting to apply patches located in
/usr/src/daemontools-installer/patches...
/usr/src/daemontools-installer/patches/errno.patch
patching file src/error.h
/usr/src/daemontools-installer/patches/fileutils.patch
patching file src/rts.tests
dh_testdir
package/compile
Linking ./src/* into ./compile...
Compiling everything in ./compile...
make[1]: Entering directory `/tmp/daemontools/admin/daemontools-0.76/compile'
sh find-systype.sh > systype
rm -f compile
sh print-cc.sh > compile
chmod 555 compile
./compile byte_chr.c
./compile byte_copy.c
./compile byte_cr.c
./compile byte_diff.c
...
make[1]: Leaving directory `/tmp/daemontools/admin/daemontools-0.76/compile'
Copying commands into ./command...
touch build-stamp
dh_testdir
dh_testroot
dh_clean -k
dh_clean: Compatibility levels before 4 are deprecated.
dh_installdirs
dh_installdirs: Compatibility levels before 4 are deprecated.
mkdir -p debian/daemontools/package/admin/daemontools-0.76
mkdir -p debian/daemontools/command
mkdir -p debian/daemontools/usr/share/daemontools
mkdir -p debian/daemontools/service
cp -a command debian/daemontools/package/admin/daemontools-0.76
cp -a compile debian/daemontools/package/admin/daemontools-0.76
cp -a package debian/daemontools/package/admin/daemontools-0.76
cp -a src debian/daemontools/package/admin/daemontools-0.76
dh_link package/admin/daemontools-0.76/package usr/share/daemontools/package
dh_link: Compatibility levels before 4 are deprecated.
dh_link package/admin/daemontools-0.76 package/admin/daemontools
dh_link: Compatibility levels before 4 are deprecated.
dh_link package/admin/daemontools-0.76/command/envdir command/envdir
dh_link: Compatibility levels before 4 are deprecated.
dh_link package/admin/daemontools-0.76/command/envuidgid command/envuidgid
dh_link: Compatibility levels before 4 are deprecated.
dh_link package/admin/daemontools-0.76/command/fghack command/fghack
dh_link: Compatibility levels before 4 are deprecated.
dh_link package/admin/daemontools-0.76/command/multilog command/multilog
dh_link: Compatibility levels before 4 are deprecated.
dh_link package/admin/daemontools-0.76/command/pgrphack command/pgrphack
dh_link: Compatibility levels before 4 are deprecated.
dh_link package/admin/daemontools-0.76/command/readproctitle
command/readproctitle
dh_link: Compatibility levels before 4 are deprecated.
dh_link package/admin/daemontools-0.76/command/setlock command/setlock
dh_link: Compatibility levels before 4 are deprecated.
dh_link package/admin/daemontools-0.76/command/setuidgid command/setuidgid
dh_link: Compatibility levels before 4 are deprecated.
dh_link package/admin/daemontools-0.76/command/softlimit command/softlimit
...
dh_gencontrol
dh_gencontrol: Compatibility levels before 4 are deprecated.
dh_md5sums
dh_md5sums: Compatibility levels before 4 are deprecated.
dpkg-deb -b debian/daemontools ..
dpkg-deb: building package `daemontools' in `../daemontools_0.76-9_i386.deb'.

It seems that all went ok

Do you want to remove all files in /tmp/daemontools,
except daemontools_0.76-9_i386.deb now? [Yn] 
Removing files... done

Do you want to install daemontools_0.76-9_i386.deb now? [Yn] n

Do you want to purge daemontools-installer now? [yN] 

Good luck!

So the compile succeeded but the subsequent package compilation failed with "dh_link: Compatibility levels before 4 are deprecated." errors. The makefile was not built to handle these kinds of errors by the looks because at the end I got told all seems OK - which is of course not the case, the package is empty.

Well, I managed to build it somewhere else and install the binaries that way into the Virtual Machine. But then I noticed the issue above, in other words the services would not run because the root process was not started.

After searching around on the web I found - of course - a post outlining the same issue. As usual you go through the same steps and pain just to find out that someone else found the same problem and already fixed it.

The solution is to start the root daemontools process just like any other process. The post has a script that I include below (in case in gets lost in the Intertubes):

$ cat /etc/init.d/svscanboot 

#! /bin/sh
#
# daemontools for launching /etc/svscanboot from sysvinit instead of /sbin/init.
#
# author: dkg

set -e

PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
DESC="daemontools"
NAME=svscanboot
DAEMON=/command/svscanboot

PIDFILE=/var/run/$NAME.pid
SCRIPTNAME=/etc/init.d/$NAME

# Gracefully exit if the package has been removed.
test -x $DAEMON || exit 0

#
# Function that starts the daemon/service.
#
d_start() {
        start-stop-daemon --start --background --make-pidfile --quiet --pidfile $PIDFILE \
                --exec $DAEMON
}

#
# Function that stops the daemon/service.
#
d_stop() {
        start-stop-daemon --stop --quiet --pidfile $PIDFILE \
                --name $NAME
        echo "not cleaning up svscan and readproctitle subprocesses
appropriately. dkg is lazy."
}

#
# Function that sends a SIGHUP to the daemon/service.
#
d_reload() {
        start-stop-daemon --stop --quiet --pidfile $PIDFILE \
                --name $NAME --signal 1
}

case "$1" in
  start)
        echo -n "Starting $DESC: $NAME"
        d_start
        echo "."
        ;;
  stop)
        echo -n "Stopping $DESC: $NAME"
        d_stop
        echo "."
        ;;
  restart|force-reload)
        #
        # If the "reload" option is implemented, move the "force-reload"
        # option to the "reload" entry above. If not, "force-reload" is
        # just the same as "restart".
        #
        echo -n "Restarting $DESC: $NAME"
        d_stop
        sleep 1
        d_start
        echo "."
        ;;
  *)
        # echo "Usage: $SCRIPTNAME {start|stop|restart|reload|force-reload}" >&2
        echo "Usage: $SCRIPTNAME {start|stop|restart|force-reload}" >&2
        exit 1
        ;;
esac

exit 0

Now, other post mention that there is also a "fakeinit" style - but it did not work for me and I rather believe that this is the old name for "plain" mentioned in the Init Styles document I linked above.

Goes to show that a lot is unclear about VServer. But that is often the case with open-source tools and systems. It is up to us, the IT people, to help out and close those gaps while contributing to the community.

Wednesday, January 14, 2009

Looking for the perfect beat

I mentioned earlier that I used to go to all sorts of clubs back when it was cool to wear a big yellow smilie face on the back of your jacket. No kidding!

You are listening to all sorts of music and songs and older ones go while new ones come. Sure, beats and lyrics do get recycled and mixed up - maybe the younger generations do not realize the rip off and enjoy the canned, uninspired and lame version. But there are some songs that stand out. They leave a mark on a deeper level effecting you mentally and emotionally. While it could be that the song reminds you of that last great summer of school, or your first kiss - or generally having had a bast view friends while rocking, dancing or singing to this one particular track.

Now, listening to Techno is making things even harder, because they are produced by a handful of producers using different aliases for every record they release. In other words names are meaningless.

Years later I am trying to collect as many of these tracks I can find. I will try to randomly upload them here for your entertainment. Please bear in mind that they are from a time long gone, so they may sounds silly nowadays. But I do not care, I have special place for them in my heart.

The following track was - for me - the culmination of Electronic Music. I listened to it on the way to the club, the VW Beattle I had packed with friends. I had an amp and two 30" woofers in the back in that small luggage compartment below the rear window. The whole system did not cost more than about 150 Deutsche Mark - a steal. The board carrying the woofers was an old desktop plate my father got from the school he worked in.

We were going 120KM/h (or about 80mph) and the woofers were muting any other noise present in the car. No engine noise, no wind noise - nothing but the sounds of music.

The name of the Track is "Stakker Humanoid" by "Humanoid" (check out its Wiki info). It sort of builds up as it goes and I always loved that there is a distinct sharper synth beat at about 1:30 minute in. Listening to it really loud is the key here! This track is the pure Electronic - no natural sounds, fully synthesized and computer generated. Pure bliss, ahh.

Update: I just realized that this track is now exactly 20 years old (since it was released 1989 in Germany). Happy birthday!

iPhone as an iPod

I am using my iPhone for a few years now, I bought one about a month after they were released first - yeah, that is how long I could refrain myself from being the typical fanboy. While I use quite a list of apps more or less daily (I will post about this later) I found that I still do not know all the little tricks it has in store.

Only recently I wondered why I sometimes see the iPod controls on the locked start screen. After fiddling around for a few minutes I found that double clicking the button reveals these controls. First here is how the normal locked screen looks like:

Now below you can see how the screen changes when you double click on the main button located below the screen. It allows you to use the iPhone more like an iPod without the need to unlock the device, click through screens to get to the iPod application - something even worse when you have already clicked away before the device was locked, for example listening to music while reading emails.

Once you have the controls you can start and stop the music and navigate back and forth through your playlist. I used to do that with the button in the microphone of the iPhone headset. Which is still fine, but this is an alternative in case you have your phone already in your hand. Here the screen shot of the iPod controls:

By the way, double clicking a second time hides the controls again.

Another little trick is if you have successfully scrolled through the pages of applications and web links on your iPhones main screen and you reached page 5 or so you suddenly realize that you need to look up an address using the Map application. Sure you can "swish and flick" your way back to the first screen, but a much easier way is to single click the main iPhone button positioned under the screen again. It scrolls all the way back to the first screen. Nice!

Saturday, January 10, 2009

Eternal Moments

There are so many things in life you encounter where you say to yourself: I will never forget this. But they are gone. I cannot remember many of these special moments in my life worth remembering. Songs that we listened to, movies we watched, radio stations we tuned into every week to "tape" our favorite show.

But some moments do embed themselves into our memory never to be forgotten. Why is that so? Why not always? I assume I have to cherish those memories and try to keep them alive as long as I can. Sometimes I start wondering if they actually happened - or if they are just mashed together snippets of memories, dreams and wishful thinking. A fickle thing indeed.

Yet there are some moments you recall exactly. I remember some from my youth, one is where we were in a "disco" (or club as it is called in native English speaking countries nowadays) and the music style of the time was Techno. The name of the club was "Easy" and in comparison one of the larger clubs, fitting 3000 people (so I heard). They had the typical green laser show that you could find in many disco's that wanted to be special.

I was chatting with a friend of mine I met by accident - the club was at least 70KM (45 miles) away from our home town yet here you could find at least a handful of people you knew from there. A song came on and the lasers kicked in. We started dancing to the song in true Techno style a combination of break dance and frantic hand and body movements.

I recall having the time of my life. I felt free, young and without any sorrows. Ah, those were the days...

VServer is not Xen

Recently I had to work on a Virtual Machine (VM) running on VServer. In the past I used Xen to create virtual machines but due to the nature of the task VServer seemed more appropriate. I only have to run two Debian Etch VM's on a Debian Etch host. Because of the much narrower interface to the Operation System (OS) it makes sense for VServer hosts to run without much of the overhead - and therefore faster as well.

There are a few things that are quite nice about the lesser abstraction of VServer compared to Xen. For example copying a Virtual Machine is much simpler I found and files can be copied into place from the master because the file system of the VM's are simply directories of the master file system.

One thing I noticed is though that it is much more difficult to run certain daemons in the VM's and/or the master at the same time. The separation in Xen completely splits master and VM on the kernel level, running the same daemon on the same port is a natural fit. Nothing to be done. Not so with VServer.

I tried to run SSH, NTP and SNMP on the master and the two VM's I tried to set up. First issue I ran into was SSH. SSH on the master is listening on all network addresses, configured as such:

ListenAddress 0.0.0.0

When you now try to start the SSH daemon on the VM's you get an error that the address is already in use - by the master of course! Master and Virtual Machines share the network layer and this is now causing a problem.

The issue in itself is solved by setting the listening address to a specific one, namely the address of the master:

ListenAddress 192.168.1.100

Then it binds to the default socket only on that interface and the VM's are free to bind their daemons to their IP.

The second issue I ran into is NTP. I tried to run it the same way as the SSH daemon, but since the listening address is not something you can specify in the /etc/ntp.conf the NTP daemon is binding to all interfaces and we have the same error on the VM's as mentioned above.

I found it best to remove NTP completely from the VM's and only run it on the master. It seems after a few weeks of observation that the time is "passed" on to the VM's, in other words their time stays in sync. This somewhat makes sense considering the thin layer VServer has to run the Virtual Machines. They simply use the same internal clock and if the master is in sync then so are the VM's.

Friday, January 9, 2009

Odd "ps" output

While trying to figure out when I started a particular process I noticed that the normal "ps aux" or "ps -eF" is not showing the actual start date, but - depending on how long the task is already running - the year. For example:

[02:09:36 root@lv1-cpq-bl-17 bin]# ps aux
USER       PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
root         1  0.0  0.0   1944   656 ?        Ss    2008   0:01 init [2]
root         2  0.0  0.0      0     0 ?        S     2008   0:00 [migration/0]
root         3  0.0  0.0      0     0 ?        SN    2008   0:00 [ksoftirqd/0]
root         4  0.0  0.0      0     0 ?        S     2008   0:00 [events/0]
root         5  0.0  0.0      0     0 ?        S     2008   0:00 [khelper]
...
root      2851  0.0  3.9 1243532 40740 ?       Sl   Jan07   1:14 /usr/lib/jvm/java-1.5.0-sun/jre/bin/java
root      3521  0.1  4.4 1250100 45828 ?       Sl   Jan07   2:22 /usr/lib/jvm/java-1.5.0-sun/jre/bin/java
root      3629  0.0  4.1 1237900 42880 ?       Sl   Jan07   0:28 /usr/lib/jvm/java-1.5.0-sun/jre/bin/java
root      3799  0.1  5.9 1268268 61260 ?       Sl   Jan07   3:17 /usr/lib/jvm/java-1.5.0-sun/jre/bin/java
root     12274  0.0  0.0   3432   880 pts/4    R+   03:25   0:00 ps aux

So this varies from the time today when it was started, to a month/day combination all the way to just a year, because the process was started last year.

But when exactly?

Digging into the "man ps" details and using the "trial and error" approach I found out that a custom layout allows to get what I needed:

[root]# ps -e -o user,pid,pcpu,start,stime,time,vsz,rssize,ni,args
USER       PID %CPU  STARTED STIME     TIME    VSZ   RSS  NI COMMAND
root         1  0.0   Jul 01  2008 00:00:01   1944   656   0 init [2]
root         2  0.0   Jul 01  2008 00:00:00      0     0   - [migration/0]
root         3  0.0   Jul 01  2008 00:00:00      0     0  19 [ksoftirqd/0]
root         4  0.0   Jul 01  2008 00:00:00      0     0  -5 [events/0]
root         5  0.0   Jul 01  2008 00:00:00      0     0  -5 [khelper]
...
root      2851  0.0   Jan 07 Jan07 00:01:14 1243532 40740  0 /usr/lib/jvm/java-1.5.0-sun/jre/bin/java
root      3521  0.1   Jan 07 Jan07 00:02:22 1250100 45828  0 /usr/lib/jvm/java-1.5.0-sun/jre/bin/java
root      3629  0.0   Jan 07 Jan07 00:00:28 1237900 42880  0 /usr/lib/jvm/java-1.5.0-sun/jre/bin/java
root      3799  0.1   Jan 07 Jan07 00:03:17 1268268 61260  0 /usr/lib/jvm/java-1.5.0-sun/jre/bin/java
root     12275  0.0 03:25:38 03:25 00:00:00   3432   880   0 ps -e -o user,pid,pcpu,start,
stime,time,vsz,rssize,ni,args

The "start" format option resulting in the "STARTED" column above and is showing what I needed. Last thing would be I guess to set the "PS_FORMAT" environment variable if I needed this permanently.

Lineland

Wednesday, January 28, 2009

Changing HBase Tables in Code

Tuesday, January 27, 2009

How to use HBase with Hadoop

Monday, January 19, 2009

VServer is not Xen, Part 2

Wednesday, January 14, 2009

Looking for the perfect beat

iPhone as an iPod

Saturday, January 10, 2009

Eternal Moments

VServer is not Xen

Friday, January 9, 2009

Odd "ps" output

About Me

Where I am as well

My Tweets

My Tweets

Followers

Blog Archive

Labels

Wednesday, January 28, 2009

Tuesday, January 27, 2009

Monday, January 19, 2009

Wednesday, January 14, 2009

Saturday, January 10, 2009

Friday, January 9, 2009

About Me

Where I am as well

My Tweets

My Tweets

Subscribe To

Followers

Blog Archive

Labels