Mercurial > ~darius > hgwebdir.cgi > wh1080
diff db/insertwunderground.c @ 0:9dab44dcb331
Initial commit of Greg's code from http://www.lemis.com/grog/tmp/wh1080.tar.gz
author | Daniel O'Connor <darius@dons.net.au> |
---|---|
date | Tue, 09 Feb 2010 13:44:25 +1030 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/db/insertwunderground.c Tue Feb 09 13:44:25 2010 +1030 @@ -0,0 +1,237 @@ +/* + * Read in Wunderground comma-delimited files and convert them into INSERT + * commands for MySQL. + * + * $Id: insertwunderground.c,v 1.5 2009/12/09 03:27:55 grog Exp $ + */ +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/file.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + + +#define BUFSIZE 1024 +char attribute_list [BUFSIZE]; +char buf [BUFSIZE]; +char sql_command [BUFSIZE]; +int attribute_count; +char *station; /* name of station */ +char *date; /* and date from second parameter, if present */ + +/* + * Convert the Wunderground field names to our table. This is confused by the + * fact that Wunderground can't make up its mind what to call the fields, so + * multiple names are possible, and they can have spaces in them. If we don't + * care about the field, we have a null translation. + * + * Some fields are problems: time is really two fields, date and time, and + * precipitation is sometimes in cm rather than mm. We special-case them. + */ +struct attributes +{ + char *wundername; /* name supplied in file */ + char *column_name; /* and the names we use */ +} attributes [] = { + {"Clouds", "clouds"}, + {"Conditions", "conditions"}, + {"Dew PointC", "outside_dewpoint"}, + {"DewpointC", "outside_dewpoint"}, + {"Events", "events"}, + {"Gust SpeedKm/h", "wind_gust"}, + {"HourlyPrecipMM", "rain"}, + {"Humidity", "outside_humidity"}, + {"PrecipitationCm", ""}, + {"PressurehPa", "pressure_msl"}, /* I assume this is correct */ + {"Sea Level PressurehPa", "pressure_msl"}, + {"SoftwareType", ""}, + {"TemperatureC", "outside_temp"}, + {"Time", "date"}, + {"TimeEST", "time"}, + {"VisibilityKm", "visibility"}, + {"Wind Direction", "wind_direction_text"}, + {"Wind SpeedKm/h", "wind_speed"}, + {"WindDirection", "wind_direction_text"}, + {"WindDirectionDegrees", "wind_direction"}, + {"WindSpeedGustKMH", "wind_gust"}, + {"WindSpeedKMH", "wind_speed"}, + {"dailyrainCM", "" }}; + +int columns = sizeof (attributes) / sizeof (struct attributes); + +int field [50]; /* build up a table of the fields here */ + +void skipblanks (char **cp) +{ + while (**cp == ' ') + (*cp)++; +} + +int main (int argc, char *argv []) +{ + char *cp; /* pointers in buffers */ + char *ep; /* pointers in buffers */ + char *fp; + char fieldname [64]; + int fields = 0; /* becomes number of fields in table */ + int myfield; + int i; + + if (argc < 2) + { + fprintf (stderr, "Usage: $0 < file STATION_ID\n"); + exit (1); + } + station = argv [1]; /* arg is station name */ + if (argc > 2) /* date specified too */ + date = argv [2]; + + /* + * The first useful line should be a list of field names. Read it in and + * decide what we want to use. + */ + do + { + if (! fgets (attribute_list, BUFSIZE - 1, stdin)) + exit (0); /* empty file, just ignore */ + } + while (strlen (attribute_list) < 4); /* I'm not sure how long the + * initial junk is, but this + * should cover it */ + /* + * Untidy copy of field names. + */ + cp = attribute_list; + while (1) + { + fp = fieldname; + skipblanks (&cp); + while ((*cp != ',') && (*cp !='0') && (*cp !='<')) + *fp++ = *cp++; /* copy the character */ + *fp = '\0'; /* and delimit it */ + for (i = 0; i < columns; i++) + if (! strcmp (fieldname, attributes [i].wundername)) /* found it */ + { + field [fields] = i; + break; + } + if (i == columns) /* not found */ + { + fprintf (stderr, "Warning: can't find field name %s\n", fieldname); + field [fields] = -1; /* don't use this field */ + } + if (strlen (attributes [i].column_name) == 0) /* no column */ + field [fields] = -1; /* */ + if ((*cp == '<') || (*cp == '\0')) /* end of useful info */ + break; + cp++; /* skip the delimiter */ + fields++; /* done another field */ + } + /* + * On exit from the loop, fields contains the number of fields, and field [] + * is a translation table. + */ + while (1) + { + if (! fgets (buf, BUFSIZE - 1, stdin)) + exit (0); /* empty file, just ignore */ + if (memcmp (buf, "20", 2) == 0) /* we have a line starting with at date */ + { + char *time; + + /* walk through the date field and convert it to two fields */ + sprintf (sql_command, "REPLACE INTO remote_observations (station_id, date, time"); + /* first get the names */ + for (myfield = 1; myfield < fields; myfield++) + { + if (field [myfield] >= 0) + sprintf (&sql_command [strlen (sql_command)], ", %s", attributes [field [myfield]].column_name); + } + sprintf (&sql_command [strlen (sql_command)], ") VALUES ("); + /* and now the values */ + cp = buf; + while (*cp != ' ') + cp++; + *cp++ = '\0'; /* split first field */ + skipblanks (&cp); /* this shouldn't be necessary */ + time = cp; /* second is time */ + while (*cp != ',') + cp++; /* delimit */ + *cp++ = '\0'; /* delimit second field */ + /* + * We now have buf pointing to the date, time pointing to the time, both + * \0 delimited, and cp pointing to the rest of the buffer. + */ + sprintf (&sql_command [strlen (sql_command)], + "\"%s\", \"%s\", \"%s\"", + station, + buf, + time ); + /* + * Do the rest + */ + for (myfield = 1; myfield < fields; myfield++) + { + skipblanks (&cp); + ep = cp; + while ((*ep != ',') && (*ep != '\n')) + ep++; + *ep++ = '\0'; /* make a string of it */ + if (field [myfield] >= 0) + sprintf (&sql_command [strlen (sql_command)], ", \"%s\"", cp); + cp = ep; /* point to next field */ + /* + * Strictly speaking this isn't necessary, since we have a counted + * number of fields, but I don't trust the data. + */ + if (*cp == '\0') + break; + } + sprintf (&sql_command [strlen (sql_command)], ");"); + puts (sql_command); + } + else if (isdigit (buf [0])) /* must be a time */ + { + sprintf (sql_command, "REPLACE INTO remote_observations (station_id, date"); + + + /* first get the names */ + for (myfield = 0; myfield < fields; myfield++) + { + if (field [myfield] >= 0) + sprintf (&sql_command [strlen (sql_command)], ", %s", attributes [field [myfield]].column_name); + } + sprintf (&sql_command [strlen (sql_command)], ") VALUES (\"%s\", \"%s\"", + station, + date ); + /* + * Do the rest + */ + cp = buf; + for (myfield = 0; myfield < fields; myfield++) + { + skipblanks (&cp); + ep = cp; + while ((*ep != ',') && (*ep != '\n')) + ep++; + *ep++ = '\0'; /* make a string of it */ + if (field [myfield] >= 0) + sprintf (&sql_command [strlen (sql_command)], ", \"%s\"", cp); + cp = ep; /* point to next field */ + /* + * Strictly speaking this isn't necessary, since we have a counted + * number of fields, but I don't trust the data. + */ + if (*cp == '\0') + break; + } + sprintf (&sql_command [strlen (sql_command)], ");"); + puts (sql_command); + } + } + return 0; +} +