diff db/insertwunderground.c @ 0:9dab44dcb331

Initial commit of Greg's code from http://www.lemis.com/grog/tmp/wh1080.tar.gz
author Daniel O'Connor <darius@dons.net.au>
date Tue, 09 Feb 2010 13:44:25 +1030
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/db/insertwunderground.c	Tue Feb 09 13:44:25 2010 +1030
@@ -0,0 +1,237 @@
+/*
+ * Read in Wunderground comma-delimited files and convert them into INSERT
+ * commands for MySQL.
+ *
+ * $Id: insertwunderground.c,v 1.5 2009/12/09 03:27:55 grog Exp $
+ */
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+
+#define BUFSIZE 1024
+char attribute_list [BUFSIZE];
+char buf [BUFSIZE];
+char sql_command [BUFSIZE];
+int attribute_count;
+char *station;                                  /* name of station */
+char *date;                                     /* and date from second parameter, if present */
+
+/*
+ * Convert the Wunderground field names to our table.  This is confused by the
+ * fact that Wunderground can't make up its mind what to call the fields, so
+ * multiple names are possible, and they can have spaces in them.  If we don't
+ * care about the field, we have a null translation.
+ *
+ * Some fields are problems: time is really two fields, date and time, and
+ * precipitation is sometimes in cm rather than mm.  We special-case them.
+ */
+struct attributes
+{
+  char *wundername;                             /* name supplied in file  */
+  char *column_name;                            /* and the names we use */
+} attributes [] = {
+   {"Clouds", "clouds"},
+   {"Conditions", "conditions"},
+   {"Dew PointC", "outside_dewpoint"},
+   {"DewpointC", "outside_dewpoint"},
+   {"Events", "events"},
+   {"Gust SpeedKm/h", "wind_gust"},
+   {"HourlyPrecipMM", "rain"},
+   {"Humidity", "outside_humidity"},
+   {"PrecipitationCm", ""},
+   {"PressurehPa", "pressure_msl"},       /* I assume this is correct */
+   {"Sea Level PressurehPa", "pressure_msl"},
+   {"SoftwareType", ""},
+   {"TemperatureC", "outside_temp"},
+   {"Time", "date"},
+   {"TimeEST", "time"},
+   {"VisibilityKm", "visibility"},
+   {"Wind Direction", "wind_direction_text"},
+   {"Wind SpeedKm/h", "wind_speed"},
+   {"WindDirection", "wind_direction_text"},
+   {"WindDirectionDegrees", "wind_direction"},
+   {"WindSpeedGustKMH", "wind_gust"},
+   {"WindSpeedKMH", "wind_speed"},
+   {"dailyrainCM", "" }};
+
+int columns = sizeof (attributes) / sizeof (struct attributes);
+
+int field  [50];                                /* build up a table of the fields here */
+
+void skipblanks (char **cp)
+{
+  while (**cp == ' ')
+    (*cp)++;
+}
+
+int main (int argc, char *argv [])
+{
+  char *cp;                                     /* pointers in buffers */
+  char *ep;                                     /* pointers in buffers */
+  char *fp;
+  char fieldname [64];
+  int fields = 0;                               /* becomes number of fields in table */
+  int myfield;
+  int i;
+
+  if (argc < 2)
+  {
+    fprintf (stderr, "Usage: $0 < file STATION_ID\n");
+    exit (1);
+  }
+  station = argv [1];                           /* arg is station name */
+  if (argc > 2)                                 /* date specified too */
+    date = argv [2];
+
+  /*
+   * The first useful line should be a list of field names.  Read it in and
+   * decide what we want to use.
+   */
+  do
+  {
+    if (! fgets (attribute_list, BUFSIZE - 1, stdin))
+      exit (0);                                 /* empty file, just ignore  */
+  }
+  while (strlen (attribute_list) < 4);          /* I'm not sure how long the
+                                                 * initial junk is, but this
+                                                 * should cover it */
+  /*
+   * Untidy copy of field names.
+   */
+  cp = attribute_list;
+  while (1)
+  {
+    fp = fieldname;
+    skipblanks (&cp);
+    while ((*cp != ',') && (*cp !='0') && (*cp !='<'))
+      *fp++ = *cp++;                            /* copy the character */
+    *fp = '\0';                                 /* and delimit it */
+    for (i = 0; i < columns; i++)
+      if (! strcmp (fieldname, attributes [i].wundername)) /* found it */
+      {
+        field  [fields] = i;
+        break;
+      }
+    if (i == columns)                           /* not found */
+    {
+      fprintf (stderr, "Warning: can't find field name %s\n", fieldname);
+      field  [fields] = -1;                     /* don't use this field */
+    }
+    if (strlen (attributes [i].column_name) == 0) /* no column */
+      field [fields] = -1;                      /*  */
+    if ((*cp == '<') || (*cp == '\0'))          /* end of useful info */
+      break;
+    cp++;                                       /* skip the delimiter */
+    fields++;                                   /* done another field */
+  }
+  /*
+   * On exit from the loop, fields contains the number of fields, and field []
+   * is a translation table.
+   */
+  while (1)
+  {
+    if (! fgets (buf, BUFSIZE - 1, stdin))
+      exit (0);                                 /* empty file, just ignore  */
+    if (memcmp (buf, "20", 2) == 0)             /* we have a line starting with at date  */
+    {
+      char *time;
+
+      /* walk through the date field and convert it to two fields */
+      sprintf (sql_command, "REPLACE INTO remote_observations (station_id, date, time");
+      /* first get the names */
+      for (myfield = 1; myfield < fields; myfield++)
+      {
+        if (field [myfield] >= 0)
+          sprintf (&sql_command [strlen (sql_command)], ", %s", attributes [field [myfield]].column_name);
+      }
+      sprintf (&sql_command [strlen (sql_command)], ") VALUES (");
+      /* and now the values */
+      cp = buf;
+      while (*cp != ' ')
+        cp++;
+      *cp++ = '\0';                             /* split first field */
+      skipblanks (&cp);                         /* this shouldn't be necessary */
+      time = cp;                                /* second is time */
+      while (*cp != ',')
+        cp++;                                   /* delimit */
+      *cp++ = '\0';                             /* delimit second field */
+      /*
+       * We now have buf pointing to the date, time pointing to the time, both
+       * \0 delimited, and cp pointing to the rest of the buffer.
+       */
+      sprintf (&sql_command [strlen (sql_command)],
+               "\"%s\", \"%s\", \"%s\"",
+               station,
+               buf,
+               time );
+      /*
+       * Do the rest
+       */
+      for (myfield = 1; myfield < fields; myfield++)
+      {
+        skipblanks (&cp);
+        ep = cp;
+        while ((*ep != ',') && (*ep != '\n'))
+          ep++;
+        *ep++ = '\0';                           /* make a string of it */
+        if (field [myfield] >= 0)
+          sprintf (&sql_command [strlen (sql_command)], ", \"%s\"", cp);
+        cp = ep;                                /* point to next field */
+        /*
+         * Strictly speaking this isn't necessary, since we have a counted
+         * number of fields, but I don't trust the data.
+         */
+        if (*cp == '\0')
+          break;
+      }
+      sprintf (&sql_command [strlen (sql_command)], ");");
+      puts (sql_command);
+    }
+    else if (isdigit (buf [0]))                 /* must be a time  */
+    {
+      sprintf (sql_command, "REPLACE INTO remote_observations (station_id, date");
+
+
+      /* first get the names */
+      for (myfield = 0; myfield < fields; myfield++)
+      {
+        if (field [myfield] >= 0)
+          sprintf (&sql_command [strlen (sql_command)], ", %s", attributes [field [myfield]].column_name);
+      }
+      sprintf (&sql_command [strlen (sql_command)], ") VALUES (\"%s\", \"%s\"",
+               station,
+               date );
+      /*
+       * Do the rest
+       */
+      cp = buf;
+      for (myfield = 0; myfield < fields; myfield++)
+      {
+        skipblanks (&cp);
+        ep = cp;
+        while ((*ep != ',') && (*ep != '\n'))
+          ep++;
+        *ep++ = '\0';                           /* make a string of it */
+        if (field [myfield] >= 0)
+          sprintf (&sql_command [strlen (sql_command)], ", \"%s\"", cp);
+        cp = ep;                                /* point to next field */
+        /*
+         * Strictly speaking this isn't necessary, since we have a counted
+         * number of fields, but I don't trust the data.
+         */
+        if (*cp == '\0')
+          break;
+      }
+      sprintf (&sql_command [strlen (sql_command)], ");");
+      puts (sql_command);
+    }
+  }
+  return 0;
+}
+