Mercurial > ~darius > hgwebdir.cgi > wh1080
view db/insertwunderground.c @ 1:01496de9f722
Ignore build cruft.
author | Daniel O'Connor <darius@dons.net.au> |
---|---|
date | Tue, 09 Feb 2010 13:48:22 +1030 |
parents | 9dab44dcb331 |
children |
line wrap: on
line source
/* * Read in Wunderground comma-delimited files and convert them into INSERT * commands for MySQL. * * $Id: insertwunderground.c,v 1.5 2009/12/09 03:27:55 grog Exp $ */ #include <ctype.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/file.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> #define BUFSIZE 1024 char attribute_list [BUFSIZE]; char buf [BUFSIZE]; char sql_command [BUFSIZE]; int attribute_count; char *station; /* name of station */ char *date; /* and date from second parameter, if present */ /* * Convert the Wunderground field names to our table. This is confused by the * fact that Wunderground can't make up its mind what to call the fields, so * multiple names are possible, and they can have spaces in them. If we don't * care about the field, we have a null translation. * * Some fields are problems: time is really two fields, date and time, and * precipitation is sometimes in cm rather than mm. We special-case them. */ struct attributes { char *wundername; /* name supplied in file */ char *column_name; /* and the names we use */ } attributes [] = { {"Clouds", "clouds"}, {"Conditions", "conditions"}, {"Dew PointC", "outside_dewpoint"}, {"DewpointC", "outside_dewpoint"}, {"Events", "events"}, {"Gust SpeedKm/h", "wind_gust"}, {"HourlyPrecipMM", "rain"}, {"Humidity", "outside_humidity"}, {"PrecipitationCm", ""}, {"PressurehPa", "pressure_msl"}, /* I assume this is correct */ {"Sea Level PressurehPa", "pressure_msl"}, {"SoftwareType", ""}, {"TemperatureC", "outside_temp"}, {"Time", "date"}, {"TimeEST", "time"}, {"VisibilityKm", "visibility"}, {"Wind Direction", "wind_direction_text"}, {"Wind SpeedKm/h", "wind_speed"}, {"WindDirection", "wind_direction_text"}, {"WindDirectionDegrees", "wind_direction"}, {"WindSpeedGustKMH", "wind_gust"}, {"WindSpeedKMH", "wind_speed"}, {"dailyrainCM", "" }}; int columns = sizeof (attributes) / sizeof (struct attributes); int field [50]; /* build up a table of the fields here */ void skipblanks (char **cp) { while (**cp == ' ') (*cp)++; } int main (int argc, char *argv []) { char *cp; /* pointers in buffers */ char *ep; /* pointers in buffers */ char *fp; char fieldname [64]; int fields = 0; /* becomes number of fields in table */ int myfield; int i; if (argc < 2) { fprintf (stderr, "Usage: $0 < file STATION_ID\n"); exit (1); } station = argv [1]; /* arg is station name */ if (argc > 2) /* date specified too */ date = argv [2]; /* * The first useful line should be a list of field names. Read it in and * decide what we want to use. */ do { if (! fgets (attribute_list, BUFSIZE - 1, stdin)) exit (0); /* empty file, just ignore */ } while (strlen (attribute_list) < 4); /* I'm not sure how long the * initial junk is, but this * should cover it */ /* * Untidy copy of field names. */ cp = attribute_list; while (1) { fp = fieldname; skipblanks (&cp); while ((*cp != ',') && (*cp !='0') && (*cp !='<')) *fp++ = *cp++; /* copy the character */ *fp = '\0'; /* and delimit it */ for (i = 0; i < columns; i++) if (! strcmp (fieldname, attributes [i].wundername)) /* found it */ { field [fields] = i; break; } if (i == columns) /* not found */ { fprintf (stderr, "Warning: can't find field name %s\n", fieldname); field [fields] = -1; /* don't use this field */ } if (strlen (attributes [i].column_name) == 0) /* no column */ field [fields] = -1; /* */ if ((*cp == '<') || (*cp == '\0')) /* end of useful info */ break; cp++; /* skip the delimiter */ fields++; /* done another field */ } /* * On exit from the loop, fields contains the number of fields, and field [] * is a translation table. */ while (1) { if (! fgets (buf, BUFSIZE - 1, stdin)) exit (0); /* empty file, just ignore */ if (memcmp (buf, "20", 2) == 0) /* we have a line starting with at date */ { char *time; /* walk through the date field and convert it to two fields */ sprintf (sql_command, "REPLACE INTO remote_observations (station_id, date, time"); /* first get the names */ for (myfield = 1; myfield < fields; myfield++) { if (field [myfield] >= 0) sprintf (&sql_command [strlen (sql_command)], ", %s", attributes [field [myfield]].column_name); } sprintf (&sql_command [strlen (sql_command)], ") VALUES ("); /* and now the values */ cp = buf; while (*cp != ' ') cp++; *cp++ = '\0'; /* split first field */ skipblanks (&cp); /* this shouldn't be necessary */ time = cp; /* second is time */ while (*cp != ',') cp++; /* delimit */ *cp++ = '\0'; /* delimit second field */ /* * We now have buf pointing to the date, time pointing to the time, both * \0 delimited, and cp pointing to the rest of the buffer. */ sprintf (&sql_command [strlen (sql_command)], "\"%s\", \"%s\", \"%s\"", station, buf, time ); /* * Do the rest */ for (myfield = 1; myfield < fields; myfield++) { skipblanks (&cp); ep = cp; while ((*ep != ',') && (*ep != '\n')) ep++; *ep++ = '\0'; /* make a string of it */ if (field [myfield] >= 0) sprintf (&sql_command [strlen (sql_command)], ", \"%s\"", cp); cp = ep; /* point to next field */ /* * Strictly speaking this isn't necessary, since we have a counted * number of fields, but I don't trust the data. */ if (*cp == '\0') break; } sprintf (&sql_command [strlen (sql_command)], ");"); puts (sql_command); } else if (isdigit (buf [0])) /* must be a time */ { sprintf (sql_command, "REPLACE INTO remote_observations (station_id, date"); /* first get the names */ for (myfield = 0; myfield < fields; myfield++) { if (field [myfield] >= 0) sprintf (&sql_command [strlen (sql_command)], ", %s", attributes [field [myfield]].column_name); } sprintf (&sql_command [strlen (sql_command)], ") VALUES (\"%s\", \"%s\"", station, date ); /* * Do the rest */ cp = buf; for (myfield = 0; myfield < fields; myfield++) { skipblanks (&cp); ep = cp; while ((*ep != ',') && (*ep != '\n')) ep++; *ep++ = '\0'; /* make a string of it */ if (field [myfield] >= 0) sprintf (&sql_command [strlen (sql_command)], ", \"%s\"", cp); cp = ep; /* point to next field */ /* * Strictly speaking this isn't necessary, since we have a counted * number of fields, but I don't trust the data. */ if (*cp == '\0') break; } sprintf (&sql_command [strlen (sql_command)], ");"); puts (sql_command); } } return 0; }